#-------------------------------------------------------------------------------
# ANALYSIS OF ADCC DATA
#-------------------------------------------------------------------------------
library(dplyr)
library(magrittr)
library(tidyr)
library(purrr)
library(forcats)
library(ggplot2)
library(stringr)
library(readxl)
library(brms)
library(mgcv)
library(mgcViz)
library(nlme)
library(cowplot)
library(tidybayes)
library(emmeans)
library(writexl)
library(DT)
library(stringi)
source("scripts/utils/utils.R")
modernacolors()## <environment: R_GlobalEnv>
map <- purrr::mapset.seed(456)
d <- readxl::read_xlsx("processed_data/ADCC.xlsx")# Visualize the full dataset ---------------------------------------------------
d_plot <- filter(d, dose > 0) %>%
mutate(dosing_interval_weeks = as.character(dosing_interval_weeks)) %>%
mutate(
dosing_interval_weeks = case_when(
dosing_interval_weeks == "Prime only" ~ dosing_interval_weeks,
dosing_interval_weeks == "1" ~ "1 week",
any(str_detect(
dosing_interval_weeks, as.character(2:8)
)) ~ paste0(dosing_interval_weeks, " weeks")
)
) %>%
mutate(
dosing_interval_weeks = factor(dosing_interval_weeks, levels = c("Prime only", "1 week", paste0(c(
2:4, 6, 8
), " weeks"))),
day = factor(day)) %>%
mutate(
day = fct_recode(
day,
"Prime only" = "56",
"1 wk post-boost" = "64",
"2 wk post-boost" = "73",
"4 wk post-boost" = "87",
"8 wk post-boost" = "115",
"12 wk post-boost" = "143",
"16 wk post-boost" = "171",
"20 wk post-boost" = "199",
"24 wk post-boost" = "227"
),
dosing_interval_weeks = fct_recode(
dosing_interval_weeks,
"1-week interval" = "1 week",
"2-week interval" = "2 weeks",
"3-week interval" = "3 weeks",
"4-week interval" = "4 weeks",
"6-week interval" = "6 weeks",
"8-week interval" = "8 weeks"
)
)
p_raw <- ggplot(d_plot, aes(
x = day,
y = AUC,
fill = day,
group = day
)) +
geom_point(
position = position_dodge(width = 1),
pch = 21,
size = 2,
show.legend = FALSE
) +
stat_summary(
aes(group = time_post_boost),
fill = NA,
color = "grey60",
geom = "pointrange",
position = position_dodge(width = 1),
size = 0.85,
inherit.aes = TRUE,
pch = "-",
fun = ~ 10 ^ (mean(log10(.x))) # geometric mean
) +
scale_fill_manual(
values = c(
modernadarkblue,
modernaorange,
modernapurple,
modernagreen,
modernablue,
modernared,
modernalightgray,
modernalightred,
"grey60"
)
) +
scale_y_continuous(breaks = 10^(2:6),
labels = c(
expression("10" ^ "2"),
expression("10" ^ "3"),
expression("10" ^ "4"),
expression("10" ^ "5"),
expression("10" ^ "6")
), trans = "log10") +
labs(x = "") +
facet_grid(dose ~ dosing_interval_weeks,
labeller = labeller(.rows = ~ paste0("mRNA-1273 ", .x, " ug"))) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust=1, vjust = 1)
)
print(p_raw)An alternative view of the data that displays trends across dosing interval instead of across time post-boost.
p_raw2 <- ggplot(d_plot, aes(
x = dosing_interval_weeks,
y = AUC,
fill = day,
group = day
)) +
geom_point(
position = position_dodge(width = 1),
pch = 21,
size = 2,
show.legend = FALSE
) +
scale_fill_manual(
values = c(
modernadarkblue,
modernaorange,
modernapurple,
modernagreen,
modernablue,
modernared,
modernalightgray,
modernalightred,
"grey60"
)
) +
scale_y_continuous(breaks = 10^(2:6),
labels = c(
expression("10" ^ "2"),
expression("10" ^ "3"),
expression("10" ^ "4"),
expression("10" ^ "5"),
expression("10" ^ "6")
), trans = "log10") +
labs(x = "") +
facet_grid(dose ~ day,
labeller = labeller(.rows = ~ paste0("mRNA-1273 ", .x, " ug"))) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust=1, vjust = 1)
)
print(p_raw2)Since the data are structured in the same was as the ELISA data, we first apply the same modeling approach that was used for those data. Specifically, we use a generalized additive model (GAM) with a 9-dimensional thin plate spline basis in day. We try 4 different GAMs with varying complexity.
Note that we do not consider animal-specific effects as these are not actually repeated measures data.
Here we visualize the estimated smooths for each of the 4 GAMs.
#-----------------------------------------------------------------------------
# Fit the generalized additive model, excluding PBS and prime only group
#-----------------------------------------------------------------------------
d_mod <-
filter(d, dose > 0) %>%
mutate(dose = factor(dose),
dosing_interval_weeks = fct_drop(dosing_interval_weeks))
gam_fit1 <-
mgcv::gam(
log2(AUC + 1) ~ dose*dosing_interval_weeks +
s(day, k = 9, bs = "tp") +
s(dosing_interval_weeks, day, bs = "re"),
data = d_mod,
method = 'REML'
)
gam_fit2 <-
mgcv::gam(
log2(AUC + 1) ~ dose*dosing_interval_weeks +
s(day, k = 9, bs = "tp", by = dosing_interval_weeks) +
s(dosing_interval_weeks, day, bs = "re"),
data = d_mod,
method = 'REML'
)
gam_fit3 <-
mgcv::gam(
log2(AUC + 1) ~ dose*dosing_interval_weeks +
s(day, k = 9, bs = "tp", by = dosing_interval_weeks) +
s(day, k = 9, bs = "tp", by = dose),
data = d_mod,
method = 'REML'
)
gam_fit4 <-
mgcv::gam(
log2(AUC + 1) ~
s(day, k = 9, bs = "tp", by = dosing_interval_weeks) +
s(day, k = 9, bs = "tp", by = dose),
data = d_mod,
method = 'REML'
)
gam_fits <- ls(pattern = "gam_fit[[:digit:]]") %>%
purrr::set_names(.) %>%
purrr::map(~ eval(ensym(.x)))vizs <- purrr::map(gam_fits, ~ mgcViz::getViz(.x, nsim = 100))
map(1:2, ~ {
plot(sm(vizs[[1]], .x)) +
l_fitLine(color = "grey40", lty = 1) +
l_ciPoly(fill = modernablue, alpha= 0.4) +
labs(x = "Day") +
theme_bw()
}) %>%
map("ggObj") %>%
map(~ .x + theme(text = element_text(size = 8))) %>%
plot_grid(plotlist = ., nrow = 1)map(1:8, ~ {
plot(sm(vizs[[2]], .x)) +
l_fitLine(color = "grey40", lty = 1) +
l_ciPoly(fill = modernablue, alpha= 0.4) +
labs(x = "Day") +
theme_bw()
}) %>%
map("ggObj") %>%
map(~ .x + theme(text = element_text(size = 8))) %>%
plot_grid(plotlist = ., nrow = 2)map(1:9, ~ {
plot(sm(vizs[[3]], .x)) +
l_fitLine(color = "grey40", lty = 1) +
l_ciPoly(fill = modernablue, alpha= 0.4) +
labs(x = "Day") +
theme_bw()
}) %>%
map("ggObj") %>%
map(~ .x + theme(text = element_text(size = 8))) %>%
plot_grid(plotlist = ., nrow = 3)map(1:9, ~ {
plot(sm(vizs[[4]], .x)) +
l_fitLine(color = "grey40", lty = 1) +
l_ciPoly(fill = modernablue, alpha= 0.4) +
labs(x = "Day") +
theme_bw()
}) %>%
map("ggObj") %>%
map(~ .x + theme(text = element_text(size = 8))) %>%
plot_grid(plotlist = ., nrow = 3)res_df <-
imap_dfr(gam_fits, ~ {
tibble(
`Pearson residuals` = residuals(.x, type = "pearson"),
`Fitted values` = fitted(.x),
`Observed values` = .x$y
) %>%
bind_cols(d_mod) %>%
mutate(Model = .y)
})
cowplot::plot_grid(
ggplot(res_df, aes(sample = `Pearson residuals`)) +
stat_qq(pch = 21, fill = modernalightgray) +
stat_qq_line(lty = 2, col = modernaorange) +
labs(
x = "Theoretical quantiles",
y = "Sample quantiles",
title = "Q-Q plot for GAMs"
) +
facet_wrap(~ Model, scales = "free") +
theme_bw(),
ggplot(res_df, aes(x = `Fitted values`, y = `Pearson residuals`)) +
geom_point(pch = 21, fill = modernalightgray) +
geom_hline(aes(yintercept = 0), lty = 2, color = modernaorange) +
labs(title = "Residuals vs. Fitted values") +
facet_wrap(~ Model, scales = "free") +
theme_bw(),
ggplot(res_df, aes(x = `Pearson residuals`)) +
geom_histogram() +
labs(title = "Histogram of Pearson Residuals") +
facet_wrap(~ Model, scales = "free") +
theme_bw(),
ggplot(res_df, aes(x = `Fitted values`, y = `Observed values`)) +
geom_point(pch = 21, fill = modernalightgray) +
geom_abline(aes(intercept = 0, slope = 1), lty = 2, color = modernaorange) +
labs(title = "Fitted vs. Observed values") +
facet_wrap(~ Model, scales = "free") +
theme_bw(),
nrow = 2
)imap_dfr(gam_fits, ~ tibble(
Model = .y,
AIC = round(AIC(.x), digits = 2),
BIC = round(BIC(.x), digits = 2)
)) %>%
DT::datatable()It is clear that GAM fit 3 is the preferred model of those explored based off of AIC, BIC, and residual diagnostics. However, one remaining issue with this model is the heterogeneous variance, which is especially visible in the fitted values vs. residuals plot. That is, we observe some heteroskedasticity in the residuals, as variance appears to be higher at earlier timepoints, and lower at later timepoints. This is a challenging feature to incorporate into a frequentist model when we want to obtain well-calibrated uncertainty (for contrast tests) in more complex regression settings. Therefore, we implement a Bayesian GAM akin to the optimal GAM fit 3, while also accounting for heteroskedasticity.
# set up a Bayesian model so that we may have better-calibrated variance estimates
# for hypothesis testing
form <- brms::brmsformula(
# we include the "day" effect here because `brms` treats smooths as "wiggling" about
# the main effects, while `mgcv`does not
log2(AUC + 1) ~ day*dose*dosing_interval_weeks +
s(day, k = 9, bs = "tp", by = dosing_interval_weeks) +
s(day, k = 9, bs = "tp", by = dose),
# model the heteroskedastic variance in day
sigma ~ day
)
# flat (non-informative) prior on regression coefficients and on group effect for day
# student t prior log(sd) of day effects
# student t prior on heteroskedastic log(sd) of data in day
# see this with `get_prior(form, d_mod)`bfit <-
brms::brm(
form,
data = d_mod,
family = "gaussian",
iter = 4000,
warmup = 1500,
seed = 1,
thin = 2,
chains = 4,
cores = 4
)Residual diagnostics from the Bayesian GAM look good, with homoskedastic Pearson residuals. (For the residuals vs. fitted values plot, median residuals plus the 66% and 95% CIs are shown.)
cowplot::plot_grid(
d_mod %>%
add_residual_draws(bfit, ndraws = 50, type = "pearson") %>%
median_qi() %>%
ggplot(aes(sample = .residual)) +
geom_qq(shape = 16, fill = modernalightgray) +
geom_qq_line(lty = 2, col = modernaorange) +
labs(
x = "Theoretical quantiles",
y = "Sample quantiles",
title = "Q-Q plot for Bayesian\nGAM model") +
theme_bw(),
full_join(
tidybayes::add_residual_draws(d_mod, bfit, ndraws = 50, type = "pearson"),
tibble(Estimate = predict(bfit)[,"Estimate"]) %>% mutate(.row = 1:n())
) %>%
ggplot(aes(x = Estimate, y = .residual)) +
tidybayes::stat_pointinterval() +
geom_hline(aes(yintercept = 0), lty = 2, color = modernaorange) +
labs(x = "Fitted values", y = "Pearson residuals") +
theme_bw()
)To further diagnose this model, we also show 2 posterior predictive checks. Panel A displays the observed data distribution vs. 50 random samples from the fitted distribution. Panel B shows the empirical cumulative distribution function of the observed data vs. 50 random samples from the fitted distribution. We observe that our observed distribution lies within the range of distributions sampled from the posterior.
plot_grid(
pp_check(bfit, ndraws = 50),
pp_check(bfit, type = "ecdf_overlay"),
labels = c("A", "B"),
nrow = 1
)The estimated conditional effects for dose and dosing interval are shown.
condEff <- conditional_effects(
bfit,
effects = c("day:dose", "day:dosing_interval_weeks"),
ndraws = 50
)
cond1_plot <- ggplot(condEff[[1]], aes(x = day, y = 2^estimate__-1)) +
geom_line(aes(color = dose)) +
geom_ribbon(aes(fill = dose, ymin = 2^lower__-1, ymax =2^upper__-1), alpha = 0.3) +
labs(fill = "Dose (ug)", color = "Dose (ug)", x = "Day", y = "AUC") +
scale_fill_manual(values = c("grey60", modernablue)) +
scale_color_manual(values = c("grey60", modernablue)) +
scale_y_continuous(trans = "log10") +
theme_bw()
cond2_plot <- ggplot(condEff[[2]], aes(x = day, y = 2^estimate__-1)) +
geom_line(aes(color = dosing_interval_weeks)) +
geom_ribbon(aes(fill = dosing_interval_weeks, ymin = 2^lower__-1, ymax = 2^upper__-1), alpha = 0.3) +
scale_y_continuous(trans = "log10") +
scale_fill_manual(
values = c(
modernadarkblue,
modernaorange,
modernapurple,
modernagreen,
modernablue,
modernared,
modernalightgray,
modernalightred,
"grey60"
)
) +
scale_color_manual(
values = c(
modernadarkblue,
modernaorange,
modernapurple,
modernagreen,
modernablue,
modernared,
modernalightgray,
modernalightred,
"grey60"
)
) +
labs(fill = "Dosing interval\n(weeks)", color = "Dosing interval\n(weeks)", x = "Day", y = "AUC") +
theme_bw()
plot_grid(cond1_plot, cond2_plot, nrow = 1)We proceed with making pairwise group comparisons using the Bayesian GAM.
pred.bayes <- as_tibble(predict(bfit))
trend_plot <- bind_cols(d_mod, pred.bayes) %>%
mutate(dosing_interval_weeks = paste0(dosing_interval_weeks, "-week interval")) %>%
ggplot(aes(x = day, y=2^Estimate)) +
geom_line(aes(color = dosing_interval_weeks)) +
geom_ribbon(
aes(
x = day,
ymin = 2 ^ Q2.5,
ymax = 2 ^ Q97.5,
fill = dosing_interval_weeks
),
alpha = 0.3,
show.legend = FALSE
)+
scale_color_manual(
values = c(
modernadarkblue,
modernaorange,
modernapurple,
modernagreen,
modernablue,
modernared,
"grey60"
)
) +
scale_fill_manual(
values = c(
modernadarkblue,
modernaorange,
modernapurple,
modernagreen,
modernablue,
modernared,
"grey60"
)
) +
scale_y_continuous(
trans = "log10",
breaks = 10 ^ c(1:7),
labels = c(
"10",
expression("10" ^ "2"),
expression("10" ^ "3"),
expression("10" ^ "4"),
expression("10" ^ "5"),
expression("10" ^ "6"),
expression("10" ^ "7")
)
) +
scale_x_continuous(
breaks = c(56 , 64, 73, 87, 115, 143, 171, 199, 226),
labels = c(
"Prime only",
"1 wk post-boost",
"2 wk post-boost",
"4 wk post-boost",
"8 wk post-boost",
"12 wk post-boost",
"16 wk post-boost",
"20 wk post-boost",
"24 wk post-boost"
)
) +
facet_wrap(~dose, labeller = labeller(.cols = ~ paste0("mRNA-1273 ", .x, " ug"))) +
labs(x = "", y = "AUC", color = "") +
theme_bw() +
theme(
axis.text = element_text(size = 12),
axis.text.x = element_text(
angle = 45,
hjust = 1,
vjust = 1
),
axis.title = element_text(size = 14),
strip.text = element_text(size = 18),
legend.text = element_text(size = 14),
legend.title = element_text(size = 15),
plot.title = element_text(size = 20),
legend.position = "bottom"
)
print(trend_plot)These Bayesian \(P\)-values can be interpreted as the probability that group \(X\) has higher activity than group \(Y\) under the model.
epreds <- add_epred_draws(newdata = d_mod, object = bfit)
wide <- epreds %>%
pivot_wider(
id_cols = c("dose", "day"),
names_from = "dosing_interval_weeks",
values_from = ".epred"
) %>%
ungroup()
combos <- names(wide) %>%
magrittr::extract(str_detect(., "[[:digit:]]|Prime")) %>%
combn(2)
group_comparisons <- array_branch(combos, margin = 2) %>%
map_dfr(function(i) {
nm <- group_by(wide, dose) %>%
group_keys() %>%
pull(dose)
group_split(wide, dose) %>%
map_dbl(function(.d, i) {
sub <- dplyr::select(.d, i)
mean(map2_dbl(sub[[1]], sub[[2]], ~mean(.y > .x)))
}, i = i) %>%
tibble("P(x > y)" = .) %>%
mutate(dose = nm, x = i[1], y = i[2])
})
adcc_p_table <- group_comparisons %>%
unite("Contrast", x, y, sep = "-week interval / ") %>%
group_by(Contrast) %>%
mutate(Contrast = ifelse(str_detect(Contrast, "Prime"), Contrast, paste0(Contrast, "-week interval")),
P.adjusted = round(p.adjust(`P(x > y)`, method = "bonferroni"), digits = 4),
`P(x > y)` = round(`P(x > y)`, digits = 4),
.before = 1) %>%
rename("Contrast (x / y)" = "Contrast") %>%
relocate(`Contrast (x / y)`, `P(x > y)`)
DT::datatable(adcc_p_table)Each black dot corresponds to the estimated fold change for the contrasted prime-boost dosing interval. Error bars are 95% CIs on the estimated contrast.
rg <- ref_grid(bfit)
emm_draws <- emmeans(rg, ~ dosing_interval_weeks | dose) %>%
contrast("revpairwise") %>%
gather_emmeans_draws()
emmeans_plot <- summarize(emm_draws,
mean = 2 ^ mean(.value),
lCI = 2 ^ quantile(.value, .05) ) %>%
ggplot(aes(x = mean, y = contrast)) +
geom_point() +
geom_errorbar(aes(xmin = lCI, xmax = Inf)) +
geom_vline(aes(xintercept = 1), col = modernared, lty = 2) +
facet_wrap(
~ dose,
labeller = labeller(.cols = ~ paste0("mRNA-1273", .x, "ug")),
strip.position = "right",
nrow = 2
) +
labs(x = "Estimated fold change", y = "Contrasted prime-boost dosing intervals") +
scale_x_continuous(trans = "log2", breaks = c(1,2,4,16,128)) +
scale_y_discrete(
labels = ~ str_remove_all(.x, "dosing_interval_weeks") %>%
str_replace("-", "/") %>%
stringi::stri_replace_all(regex = "([[:digit:]])", replacement = "$1 weeks") %>%
str_replace(pattern = "1 weeks", "1 week")
) +
theme_bw()
print(emmeans_plot)sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin19.6.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /usr/local/Cellar/openblas/0.3.19/lib/libopenblasp-r0.3.19.dylib
## LAPACK: /usr/local/Cellar/r/4.1.2/lib/R/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] stringi_1.7.3 DT_0.21 writexl_1.4.0 emmeans_1.7.4-1
## [5] tidybayes_3.0.2 cowplot_1.1.1 mgcViz_0.1.9 qgam_1.3.4
## [9] mgcv_1.8-38 nlme_3.1-155 brms_2.16.1 Rcpp_1.0.8.3
## [13] readxl_1.3.1 stringr_1.4.0 ggplot2_3.3.6 forcats_0.5.1
## [17] purrr_0.3.4 tidyr_1.2.0 magrittr_2.0.2 dplyr_1.0.9
##
## loaded via a namespace (and not attached):
## [1] backports_1.4.1 systemfonts_1.0.4 plyr_1.8.6
## [4] igraph_1.2.11 svUnit_1.0.6 splines_4.1.2
## [7] crosstalk_1.2.0 TH.data_1.1-0 rstantools_2.1.1
## [10] inline_0.3.19 digest_0.6.29 foreach_1.5.2
## [13] htmltools_0.5.2 viridis_0.6.2 rsconnect_0.8.25
## [16] fansi_1.0.3 checkmate_2.1.0 doParallel_1.0.17
## [19] RcppParallel_5.1.4 matrixStats_0.61.0 xts_0.12.1
## [22] sandwich_3.0-1 prettyunits_1.1.1 colorspace_2.0-3
## [25] ggdist_3.0.1 textshaping_0.3.6 xfun_0.30
## [28] callr_3.7.0 crayon_1.5.1 jsonlite_1.8.0
## [31] lme4_1.1-28 survival_3.2-13 zoo_1.8-9
## [34] iterators_1.0.14 glue_1.6.2 gtable_0.3.0
## [37] distributional_0.3.0 pkgbuild_1.3.1 rstan_2.21.3
## [40] abind_1.4-5 scales_1.2.0 mvtnorm_1.1-3
## [43] DBI_1.1.2 GGally_2.1.2 miniUI_0.1.1.1
## [46] viridisLite_0.4.0 xtable_1.8-4 stats4_4.1.2
## [49] StanHeaders_2.21.0-7 htmlwidgets_1.5.4 threejs_0.3.3
## [52] arrayhelpers_1.1-0 RColorBrewer_1.1-2 posterior_1.2.1
## [55] ellipsis_0.3.2 pkgconfig_2.0.3 reshape_0.8.8
## [58] loo_2.5.0 farver_2.1.0 sass_0.4.0
## [61] utf8_1.2.2 labeling_0.4.2 tidyselect_1.1.2
## [64] rlang_1.0.2 reshape2_1.4.4 later_1.3.0
## [67] munsell_0.5.0 cellranger_1.1.0 tools_4.1.2
## [70] cli_3.3.0 generics_0.1.2 ggridges_0.5.3
## [73] evaluate_0.15 fastmap_1.1.0 ragg_1.2.1
## [76] yaml_2.3.5 processx_3.5.2 knitr_1.37
## [79] mime_0.12 projpred_2.0.2 compiler_4.1.2
## [82] bayesplot_1.8.1 shinythemes_1.2.0 rstudioapi_0.13
## [85] gamm4_0.2-6 tibble_3.1.7 bslib_0.3.1
## [88] highr_0.9 ps_1.6.0 Brobdingnag_1.2-7
## [91] lattice_0.20-45 Matrix_1.4-0 nloptr_2.0.0
## [94] markdown_1.1 shinyjs_2.1.0 tensorA_0.36.2
## [97] vctrs_0.4.1 pillar_1.7.0 lifecycle_1.0.1
## [100] jquerylib_0.1.4 bridgesampling_1.1-2 estimability_1.3
## [103] httpuv_1.6.2 R6_2.5.1 promises_1.2.0.1
## [106] KernSmooth_2.23-20 gridExtra_2.3 codetools_0.2-18
## [109] boot_1.3-28 colourpicker_1.1.0 MASS_7.3-55
## [112] gtools_3.9.2 assertthat_0.2.1 withr_2.5.0
## [115] shinystan_2.5.0 multcomp_1.4-18 parallel_4.1.2
## [118] grid_4.1.2 coda_0.19-4 minqa_1.2.4
## [121] rmarkdown_2.13 shiny_1.6.0 base64enc_0.1-3
## [124] dygraphs_1.1.1.6